Source Code of org.python.modules._codecs

/*
 * Copyright 2000 Finn Bock
 *
 * This program contains material copyrighted by:
 * Copyright (c) Corporation for National Research Initiatives.
 * Originally written by Marc-Andre Lemburg (mal@lemburg.com).
 */


package org.python.modules;


import org.python.core.Py;
import org.python.core.PyInteger;
import org.python.core.PyObject;
import org.python.core.PyString;
import org.python.core.PyTuple;
import org.python.core.PyUnicode;
import org.python.core.codecs;


public class _codecs {


    public static void register(PyObject search_function) {
        codecs.register(search_function);
    }


    public static PyTuple lookup(String encoding) {
        return codecs.lookup(encoding);
    }


    private static PyTuple decode_tuple(String s, int len) {
        return new PyTuple(new PyObject[] { new PyUnicode(s), Py.newInteger(len) });
    }


    private static PyTuple encode_tuple(String s, int len) {
        return new PyTuple(new PyObject[] { Py.java2py(s), Py.newInteger(len) });
    }


    /* --- UTF-8 Codec --------------------------------------------------- */


    public static PyTuple utf_8_decode(String str) {
        return utf_8_decode(str, null);
    }


    public static PyTuple utf_8_decode(String str, String errors) {
        int size = str.length();
        return decode_tuple(codecs.PyUnicode_DecodeUTF8(str, errors), size);
    }


    public static PyTuple utf_8_encode(String str) {
        return utf_8_encode(str, null);
    }


    public static PyTuple utf_8_encode(String str, String errors) {
        int size = str.length();
        return encode_tuple(codecs.PyUnicode_EncodeUTF8(str, errors), size);
    }


    /* --- Character Mapping Codec --------------------------------------- */


    public static PyTuple charmap_decode(String str, String errors, PyObject mapping) {
        int size = str.length();
        StringBuffer v = new StringBuffer(size);


        for (int i = 0; i < size; i++) {
            char ch = str.charAt(i);
            if (ch > 0xFF) {
                codecs.decoding_error("charmap", v, errors, "ordinal not in range(255)");
                i++;
                continue;
            }


            PyObject w = Py.newInteger(ch);
            PyObject x = mapping.__finditem__(w);
            if (x == null) {
                /* No mapping found: default to Latin-1 mapping if possible */
                v.append(ch);
                continue;
            }


            /* Apply mapping */
            if (x instanceof PyInteger) {
                int value = ((PyInteger) x).getValue();
                if (value < 0 || value > 65535)
                    throw Py.TypeError("character mapping must be in range(65535)");
                v.append((char) value);
            } else if (x == Py.None) {
                codecs.decoding_error("charmap", v, errors, "character maps to <undefined>");
            } else if (x instanceof PyString) {
                v.append(x.toString());
            } else {
                /* wrong return value */
                throw Py.TypeError("character mapping must return integer, " + "None or unicode");
            }
        }
        return decode_tuple(v.toString(), size);
    }


    public static PyTuple charmap_encode(String str, String errors, PyObject mapping) {
        int size = str.length();
        StringBuffer v = new StringBuffer(size);


        for (int i = 0; i < size; i++) {
            char ch = str.charAt(i);
            PyObject w = Py.newInteger(ch);
            PyObject x = mapping.__finditem__(w);
            if (x == null) {
                /* No mapping found: default to Latin-1 mapping if possible */
                if (ch < 256)
                    v.append(ch);
                else
                    codecs.encoding_error("charmap", v, errors, "missing character mapping");
                continue;
            }
            if (x instanceof PyInteger) {
                int value = ((PyInteger) x).getValue();
                if (value < 0 || value > 255)
                    throw Py.TypeError("character mapping must be in range(256)");
                v.append((char) value);
            } else if (x == Py.None) {
                codecs.encoding_error("charmap", v, errors, "character maps to <undefined>");
            } else if (x instanceof PyString) {
                v.append(x.toString());
            } else {
                /* wrong return value */
                throw Py.TypeError("character mapping must return " + "integer, None or unicode");
            }
        }
        return encode_tuple(v.toString(), size);
    }


    /* --- 7-bit ASCII Codec -------------------------------------------- */


    public static PyTuple ascii_decode(String str) {
        return ascii_decode(str, null);
    }


    public static PyTuple ascii_decode(String str, String errors) {
        int size = str.length();
        return decode_tuple(codecs.PyUnicode_DecodeASCII(str, size, errors), size);
    }


    public static PyTuple ascii_encode(String str) {
        return ascii_encode(str, null);
    }


    public static PyTuple ascii_encode(String str, String errors) {
        int size = str.length();
        return encode_tuple(codecs.PyUnicode_EncodeASCII(str, size, errors), size);
    }


    /* --- Latin-1 Codec -------------------------------------------- */


    public static PyTuple latin_1_decode(String str) {
        return latin_1_decode(str, null);
    }


    public static PyTuple latin_1_decode(String str, String errors) {
        int size = str.length();
        StringBuffer v = new StringBuffer(size);


        for (int i = 0; i < size; i++) {
            char ch = str.charAt(i);
            if (ch < 256) {
                v.append(ch);
            } else {
                codecs.decoding_error("latin-1", v, errors, "ordinal not in range(256)");
                i++;
                continue;
            }
        }


        return decode_tuple(v.toString(), size);
    }


    public static PyTuple latin_1_encode(String str) {
        return latin_1_encode(str, null);
    }


    public static PyTuple latin_1_encode(String str, String errors) {
        int size = str.length();
        StringBuffer v = new StringBuffer(size);


        for (int i = 0; i < size; i++) {
            char ch = str.charAt(i);
            if (ch >= 256) {
                codecs.encoding_error("latin-1", v, errors, "ordinal not in range(256)");
            } else
                v.append(ch);
        }
        return encode_tuple(v.toString(), size);
    }


    /* --- UTF16 Codec -------------------------------------------- */


    public static PyTuple utf_16_encode(String str) {
        return utf_16_encode(str, null);
    }


    public static PyTuple utf_16_encode(String str, String errors) {
        return encode_tuple(encode_UTF16(str, errors, 0), str.length());
    }


    public static PyTuple utf_16_encode(String str, String errors, int byteorder) {
        return encode_tuple(encode_UTF16(str, errors, byteorder), str.length());
    }


    public static PyTuple utf_16_le_encode(String str) {
        return utf_16_le_encode(str, null);
    }


    public static PyTuple utf_16_le_encode(String str, String errors) {
        return encode_tuple(encode_UTF16(str, errors, -1), str.length());
    }


    public static PyTuple utf_16_be_encode(String str) {
        return utf_16_be_encode(str, null);
    }


    public static PyTuple utf_16_be_encode(String str, String errors) {
        return encode_tuple(encode_UTF16(str, errors, 1), str.length());
    }


    private static String encode_UTF16(String str, String errors, int byteorder) {
        int size = str.length();
        StringBuffer v = new StringBuffer((size + (byteorder == 0 ? 1 : 0)) * 2);


        if (byteorder == 0) {
            v.append((char) 0xFE);
            v.append((char) 0xFF);
        }


        if (byteorder == 0 || byteorder == 1)
            for (int i = 0; i < size; i++) {
                char ch = str.charAt(i);
                v.append((char) ((ch >>> 8) & 0xFF));
                v.append((char) (ch & 0xFF));
            }
        else {
            for (int i = 0; i < size; i++) {
                char ch = str.charAt(i);
                v.append((char) (ch & 0xFF));
                v.append((char) ((ch >>> 8) & 0xFF));
            }
        }


        return v.toString();
    }


    public static PyTuple utf_16_decode(String str) {
        return utf_16_decode(str, null);
    }


    public static PyTuple utf_16_decode(String str, String errors) {
        int[] bo = new int[] { 0 };
        return decode_tuple(decode_UTF16(str, errors, bo), str.length());
    }


    public static PyTuple utf_16_decode(String str, String errors, int byteorder) {
        int[] bo = new int[] { byteorder };
        return decode_tuple(decode_UTF16(str, errors, bo), str.length());
    }


    public static PyTuple utf_16_le_decode(String str) {
        return utf_16_le_decode(str, null);
    }


    public static PyTuple utf_16_le_decode(String str, String errors) {
        int[] bo = new int[] { -1 };
        return decode_tuple(decode_UTF16(str, errors, bo), str.length());
    }


    public static PyTuple utf_16_be_decode(String str) {
        return utf_16_be_decode(str, null);
    }


    public static PyTuple utf_16_be_decode(String str, String errors) {
        int[] bo = new int[] { 1 };
        return decode_tuple(decode_UTF16(str, errors, bo), str.length());
    }


    public static PyTuple utf_16_ex_decode(String str) {
        return utf_16_ex_decode(str, null);
    }


    public static PyTuple utf_16_ex_decode(String str, String errors) {
        return utf_16_ex_decode(str, errors, 0);
    }


    public static PyTuple utf_16_ex_decode(String str, String errors, int byteorder) {
        int[] bo = new int[] { 0 };
        String s = decode_UTF16(str, errors, bo);
        return new PyTuple(new PyObject[] { Py.newString(s), Py.newInteger(str.length()), Py.newInteger(bo[0]) });
    }


    private static String decode_UTF16(String str, String errors, int[] byteorder) {
        int bo = 0;
        if (byteorder != null)
            bo = byteorder[0];


        int size = str.length();


        if (size % 2 != 0)
            codecs.decoding_error("UTF16", null, errors, "truncated data");


        StringBuffer v = new StringBuffer(size / 2);


        for (int i = 0; i < size; i += 2) {
            char ch1 = str.charAt(i);
            char ch2 = str.charAt(i + 1);
            if (ch1 == 0xFE && ch2 == 0xFF) {
                bo = 1;
                continue;
            } else if (ch1 == 0xFF && ch2 == 0xFE) {
                bo = -1;
                continue;
            }


            char ch;
            if (bo == -1)
                ch = (char) (ch2 << 8 | ch1);
            else
                ch = (char) (ch1 << 8 | ch2);


            if (ch < 0xD800 || ch > 0xDFFF) {
                v.append(ch);
                continue;
            }


            /* UTF-16 code pair: */
            if (i == size - 1) {
                codecs.decoding_error("UTF-16", v, errors, "unexpected end of data");
                continue;
            }


            ch = str.charAt(++i);
            if (0xDC00 <= ch && ch <= 0xDFFF) {
                ch = str.charAt(++i);
                if (0xD800 <= ch && ch <= 0xDBFF)
                    /* This is valid data (a UTF-16 surrogate pair), but
                       we are not able to store this information since our
                       Py_UNICODE type only has 16 bits... this might
                       change someday, even though it's unlikely. */
                    codecs.decoding_error("UTF-16", v, errors, "code pairs are not supported");
                continue;
            }
            codecs.decoding_error("UTF-16", v, errors, "illegal encoding");
        }


        if (byteorder != null)
            byteorder[0] = bo;


        return v.toString();
    }


    /* --- RawUnicodeEscape Codec ----------------------------------------- */


    public static PyTuple raw_unicode_escape_encode(String str) {
        return raw_unicode_escape_encode(str, null);
    }


    public static PyTuple raw_unicode_escape_encode(String str, String errors) {
        return encode_tuple(codecs.PyUnicode_EncodeRawUnicodeEscape(str, errors, false), str.length());
    }


    public static PyTuple raw_unicode_escape_decode(String str) {
        return raw_unicode_escape_decode(str, null);
    }


    public static PyTuple raw_unicode_escape_decode(String str, String errors) {
        return decode_tuple(codecs.PyUnicode_DecodeRawUnicodeEscape(str, errors), str.length());
    }


    /* --- UnicodeEscape Codec -------------------------------------------- */


    public static PyTuple unicode_escape_encode(String str) {
        return unicode_escape_encode(str, null);
    }


    public static PyTuple unicode_escape_encode(String str, String errors) {
        return encode_tuple(PyString.encode_UnicodeEscape(str, false), str.length());
    }


    public static PyTuple unicode_escape_decode(String str) {
        return unicode_escape_decode(str, null);
    }


    public static PyTuple unicode_escape_decode(String str, String errors) {
        int n = str.length();
        return decode_tuple(PyString.decode_UnicodeEscape(str, 0, n, errors, true), n);
    }


    /* --- UnicodeInternal Codec ------------------------------------------ */


    public static PyTuple unicode_internal_encode(String str) {
        return unicode_internal_encode(str, null);
    }


    public static PyTuple unicode_internal_encode(String str, String errors) {
        return encode_tuple(str, str.length());
    }


    public static PyTuple unicode_internal_decode(String str) {
        return unicode_internal_decode(str, null);
    }


    public static PyTuple unicode_internal_decode(String str, String errors) {
        return decode_tuple(str, str.length());
    }


}
Source Code of org.python.modules._codecs

Related Classes of org.python.modules._codecs